For this Seurat analysis, we used the following parameters:

pars <- as.data.frame(do.call("rbind", params))
colnames(pars) <- "parameters_used"
pars

Starting with this Seurat object from SCBA output (integrated on source name).

path <- "/fast/groups/cubi/work/milekm/mireille/de/rerun/sc-batch-analysis-rerun/out_rds"
sobj <- readRDS(file.path(path,params$path_to_object))

DimPlot(sobj, label=T, repel =T)

Look at some quality control parameters.

DefaultAssay(sobj) <- "RNA"

FeaturePlot(sobj, features = c("pct.mito", "pct.ribo", "Malat1", "Neat1"))

Show % cells per cluster per replicate to get an idea of reproducibility.

clusters <- sobj[[]] %>% 
  pull(seurat_clusters) %>% 
  unique() %>% as.character() %>% 
  as.numeric() %>% 
  sort() %>% 
  as.character()

sobj[[]] %>%
  select(sample, group, seurat_clusters,age,strain,compartment,timepoint) %>%
  group_by(sample, group, seurat_clusters,age,strain,compartment,timepoint) %>%
  summarise(ncells=n()) %>%
  ungroup() %>%
  group_by(sample) %>%
  mutate(cells_in_sample=sum(ncells), cell_proportion=ncells/sum(ncells)*100) %>%
  mutate(seurat_clusters = factor(seurat_clusters, levels = clusters)) %>%
  ggplot(aes(seurat_clusters, cell_proportion, fill=timepoint)) + 
  geom_boxplot(position=position_dodge(width=.5),width=.5, outlier.shape = 1, outlier.colour = "red")+
  geom_point(position=position_dodge(width=.5),colour='grey30', shape=1, size=2)+
   facet_wrap(~age+strain+compartment, scales = "free", ncol = 1) +
  theme(axis.text.x = element_text(angle=45,vjust=1,hjust=1))+
  xlab("")+ylab("% cells_in_sample")

Which proteins were detected in the ADT assay?

# which proteins are detected in ADT?
GetAssayData(sobj, assay = "Antibody.Capture", slot = "data") %>% rownames()
##  [1] "CD335" "CD3"   "CD4"   "CD8a"  "CD19"  "HTO7"  "HTO8"  "HTO9"  "HTO10"
## [10] "HTO11" "HTO12" "HTO13" "HTO14" "HTO15" "HTO16" "HTO17" "HTO18" "HTO19"
## [19] "HTO20"

Possible outliers according to cell proportion analysis: 12w_nonSPF_5days_fx, 26w_nonSPF_5days_fx, 26w SPF 5days fx

Show RNA and ADT signal for the detected molecules.

DefaultAssay(sobj) <- "Antibody.Capture"

sobj <- sobj %>%
  NormalizeData(normalization.method = "CLR", margin = 2, assay = "Antibody.Capture")

DefaultAssay(sobj) <- "Antibody.Capture"

sobj %>%
  FeaturePlot(features=c("CD19"))

DefaultAssay(sobj) <- "RNA"
sobj %>%
  FeaturePlot(features=c("Cd19"))

DefaultAssay(sobj) <- "RNA"
sobj %>%
  FeaturePlot(features=c("Foxp3"))

sobj %>%
  FeaturePlot(features=c("Cd3e"))

DefaultAssay(sobj) <- "Antibody.Capture"
sobj %>%
  FeaturePlot(features=c("CD3"))

sobj %>%
  FeaturePlot(features=c("CD4"))

sobj %>%
  FeaturePlot(features=c("CD335"))

Show dotplot for gene expression in clusters.

marks <- c("Siglech","S100a8","Csf1r","Cd3g","Klrd1","Cd79a","Vpreb1","Ebf1","Cd74","Ptprc","Tfrc","Hbb-bt","Kit","Flt3","Gata1"," Itga2b","Mpo","Elane","Ms4a2","Cd4","Cd8a","Cd19","Cd34","Cd27","Itga4","Itgax","Siglecf","Ccr3","Ly96","Chit1","Ctsk","Nfatc1","Atp6v0d2","Acp5","Ocstamp","Cd14", "Cxcr4", "Cx3cr1", "Ccr2","Mt3","Mmp9","Vcam1","C1qa","Cd5l","C1qc","Pf4")

Idents(sobj) <- "seurat_clusters"
DotPlot(sobj, assay = "RNA", features = marks) +
  theme(axis.text.x = element_text(angle=90,hjust=1,vjust=.5))

Show de novo markers per cluster (RNA assay).

DefaultAssay(sobj) <- "RNA"
Idents(sobj) <- "seurat_clusters"

markers <- FindAllMarkers(sobj, only.pos = TRUE, min.pct = 0.25,
                          thresh.use = 0.25,
                          print.bar=FALSE, verbose=FALSE)

write.csv(markers,'310124_cluster_markers_rna.csv',row.names=FALSE)
markers <- read.csv('310124_cluster_markers_rna.csv',
                    stringsAsFactors=FALSE)

top5 <- markers %>%
  group_by(cluster) %>%
  top_n(5, avg_log2FC)

top10 <- markers %>%
  group_by(cluster) %>%
  top_n(10, avg_log2FC)

DotPlot(sobj, features = unique(top5$gene)) +
  theme_classic() +
  theme(axis.text.x=element_text(angle=90,hjust=1,vjust=.5))+
  ggtitle("top5 markers per cluster")

Show de novo markers for the ADT assay.

DefaultAssay(sobj) <- "Antibody.Capture"
Idents(sobj) <- "seurat_clusters"

markers <- FindAllMarkers(sobj, only.pos = TRUE, min.pct = 0.25,
                          thresh.use = 0.25,
                          print.bar=FALSE, verbose=FALSE)

write.csv(markers,'310124_cluster_markers_adt.csv',row.names=FALSE)
markers <- read.csv('310124_cluster_markers_adt.csv',
                    stringsAsFactors=FALSE)

top5 <- markers %>%
  group_by(cluster) %>%
  top_n(5, avg_log2FC)

top10 <- markers %>%
  group_by(cluster) %>%
  top_n(10, avg_log2FC)

DotPlot(sobj, features = unique(top5$gene)) +
  theme_classic() +
  theme(axis.text.x=element_text(angle=90,hjust=1,vjust=.5))+
  ggtitle("top5 markers per cluster")

Perform label transfer from the Tabula Muris, droplet dataset (FACS does not work for the label transfer here.)

DefaultAssay(sobj) <- "RNA"
#this is from tabula muris https://figshare.com/articles/dataset/Robject_files_for_tissues_processed_by_Seurat/5821263

load("references_bone_marrow/droplet_Marrow_seurat_tiss.Robj")
ref1 <- UpdateSeuratObject(tiss)

# load("references_bone_marrow/facs_Marrow_seurat_tiss.Robj")
# ref2<-UpdateSeuratObject(tiss)

anchors <- FindTransferAnchors(reference = ref1, query = sobj, dims = 1:20, reference.reduction = "pca")
predictions <- TransferData(anchorset = anchors, refdata = ref1$cell_ontology_class, dims = 1:20)
sobj <- AddMetaData(sobj, metadata = predictions)

# DimPlot(sobj, group.by="predicted.id")

DimPlot(sobj, group.by="predicted.id", label = T, repel = T)

Show distributions of label transfer scores.

cols <- colnames(sobj[[]])[grepl("predict", colnames(sobj[[]]))]

sobj[[]] %>%
  tibble::rownames_to_column("cells") %>%
    select(all_of(c("cells", cols))) %>%
  mutate(cell_id = paste0(cells, "_", predicted.id)) %>%
  select(-cells, -predicted.id) %>%
  pivot_longer(-cell_id, values_to = "score") %>%
  group_by(cell_id) %>%
  summarise(max_score = max(score)) %>%
  mutate(cell_type = sub(".*_", "", cell_id)) %>%
  ggplot(aes(cell_type, max_score, fill = cell_type)) + 
  geom_boxplot() +
  theme(axis.text.x=element_text(angle=90, hjust=1,vjust=.5))

Lets look at cell proportions again, this time by annotated cell type.

sobj[[]] %>%
  select(sample, group, predicted.id,age,strain,compartment,timepoint) %>%
  group_by(sample, group, predicted.id,age,strain,compartment,timepoint) %>%
  summarise(ncells=n()) %>%
  ungroup() %>%
  group_by(sample) %>%
  mutate(cells_in_sample=sum(ncells), cell_proportion=ncells/sum(ncells)*100) %>%
  ggplot(aes(predicted.id, cell_proportion, fill=timepoint)) + 
  geom_boxplot(position=position_dodge(width=.5),width=.5, outlier.shape = 1, outlier.colour = "red")+
  geom_point(position=position_dodge(width=.5),colour='grey30', shape=1, size=2)+
   facet_wrap(~age+strain+compartment, scales = "free", ncol = 1) +
  theme(axis.text.x = element_text(angle=45,vjust=1,hjust=1))+
  xlab("")+ylab("% cells_in_sample")

Another dotplot of the cell markers after cell annotation.

Idents(sobj) <- "predicted.id"
DotPlot(sobj, assay = "RNA", features = marks) +
  theme(axis.text.x = element_text(angle=90,hjust=1,vjust=.5))

Test for significant differences in cell proportions.

samples <- sobj[[]] %>% 
  pull(sample) %>% 
  unique()

cell_types <- sobj[[]] %>% 
  pull(predicted.id) %>% 
  unique()

sample_id <- expand_grid(samples, cell_types) %>%
  mutate(sample_id = paste0(samples,";", cell_types)) %>%
  select(-samples, -cell_types)

ncells <- sobj[[]] %>%
  select(sample, predicted.id) %>%
  group_by(sample, predicted.id,.drop = FALSE) %>%
  summarise(ncells=n()) %>%
  ungroup() %>%
  mutate(sample_id=paste0(sample,";",predicted.id)) %>%
  select(-sample, -predicted.id)

tot_cells <- sobj[[]] %>%
  select(sample) %>%
  group_by(sample,.drop = FALSE) %>%
  summarise(tot_cells=n()) %>%
  ungroup() 

df <- sample_id %>%
  left_join(ncells) %>%
  mutate(ncells=ifelse(is.na(ncells), 0, ncells)) %>%
  mutate(sample = sub(";.*", "", sample_id)) %>%
  left_join(tot_cells) %>%
  mutate(group=sub(".*-","",sub("_","-",sub("_","-",sub(";.*","",sample_id))))) %>%
  mutate(predicted.id=sub(".*;", "", sample_id)) %>%
  mutate(cell_proportion=ncells/tot_cells*100) %>%
  mutate(age=sub("_.*","",group)) %>%
  mutate(strain=sub("_.*","",sub(".*;", "", sub("_",";", group)))) %>%
  mutate(compartment=sub(".*_", "", group)) %>%
  mutate(timepoint=sub("_.*", "",sub(".*;", "",sub("_", ";",sub("_", ";", group))))) %>%
  mutate(group_cell_type=paste0(group,";",predicted.id))

day5 <- df %>%
  filter(timepoint=="5days") %>%
pull(group_cell_type) %>% unique()

day2 <- df %>%
  filter(timepoint=="2days") %>%
pull(group_cell_type) %>% unique()

comparisons_paired <- Map(c, day5,day2)
names(comparisons_paired) <- NULL

res_ttest <- df %>% 
  pairwise_t_test(cell_proportion~group_cell_type, comparisons=comparisons_paired, paired = T, p.adjust.method = "BH") %>%
  mutate(condition1=sub(";.*","", group1),
         condition2=sub(";.*","", group2),
         cell_type=sub(".*;","",group1)) %>%
  select(-group1,-group2)

DT::datatable(res_ttest,rownames=FALSE, extensions = 'Buttons', options = list(dom='frtBip', buttons = c('csv')))
# res_ttest_unpaired <- df %>%
#   split(f=.$predicted.id) %>%
#   lapply(function(x) x %>% 
#   pairwise_t_test(cell_proportion~group, p.adjust.method = "BH", paired=F)) %>%
#   bind_rows(.id="cell_type")
# 
# DT::datatable(res_ttest,rownames=FALSE, extensions = 'Buttons', options = list(dom='frtBip', buttons = c('csv')))
# cell_type <- "immature B cell"
# 
# df %>%
#   filter(predicted.id== cell_type) %>%
#   mutate(age_strain=paste0(age,"_", strain)) %>%
#   mutate(compartment=factor(compartment, levels=c("fx", "proximal", "distal"))) %>%
#   ggplot(aes(age_strain, cell_proportion, fill=timepoint)) + 
#   geom_boxplot(position=position_dodge(width=.5),width=.5, outlier.shape = 1, outlier.colour = "red")+
#   geom_point(position=position_dodge(width=.5),colour='grey30', shape=1, size=2)+
#   facet_wrap(~compartment, scales = "free_x")+
#   scale_fill_manual(values=c("dodgerblue3", "orange2"))+
#   theme(axis.text.x = element_text(angle=45,vjust=1,hjust=1))+
#   xlab("")+ylab("% cells_in_sample")+
#   ggtitle(cell_type)

plot_proportions <- function(cell_type){
  df %>%
    filter(predicted.id== cell_type) %>%
    mutate(age_strain=paste0(age,"_", strain)) %>%
    mutate(compartment=factor(compartment, levels=c("fx", "proximal", "distal"))) %>%
    ggplot(aes(age_strain, cell_proportion, fill=timepoint)) + 
    geom_boxplot(position=position_dodge(width=.5),width=.5, outlier.shape = 1, outlier.colour = "red")+
    geom_point(position=position_dodge(width=.5),colour='grey30', shape=1, size=2)+
    facet_wrap(~compartment, scales = "free_x")+
    scale_fill_manual(values=c("dodgerblue3", "orange2"))+
    theme(axis.text.x = element_text(angle=45,vjust=1,hjust=1))+
    xlab("")+ylab("% cells_in_sample")+
    ggtitle(cell_type)
}


lapply(unique(df$predicted.id), plot_proportions)

saveRDS(sobj, "sobj.annotated.310124.rerun.rds")
sessionInfo()
## R version 4.3.1 (2023-06-16)
## Platform: x86_64-conda-linux-gnu (64-bit)
## Running under: Rocky Linux 8.7 (Green Obsidian)
## 
## Matrix products: default
## BLAS/LAPACK: /data/cephfs-1/work/groups/cubi/users/milekm_c/miniconda/envs/R-fixed-biomart/lib/libopenblasp-r0.3.24.so;  LAPACK version 3.11.0
## 
## locale:
##  [1] LC_CTYPE=en_US.UTF-8       LC_NUMERIC=C              
##  [3] LC_TIME=en_US.UTF-8        LC_COLLATE=en_US.UTF-8    
##  [5] LC_MONETARY=en_US.UTF-8    LC_MESSAGES=en_US.UTF-8   
##  [7] LC_PAPER=en_US.UTF-8       LC_NAME=C                 
##  [9] LC_ADDRESS=C               LC_TELEPHONE=C            
## [11] LC_MEASUREMENT=en_US.UTF-8 LC_IDENTIFICATION=C       
## 
## time zone: Europe/Berlin
## tzcode source: system (glibc)
## 
## attached base packages:
## [1] stats4    stats     graphics  grDevices utils     datasets  methods  
## [8] base     
## 
## other attached packages:
##  [1] stringr_1.5.0               ggrepel_0.9.3              
##  [3] DESeq2_1.40.2               SummarizedExperiment_1.30.2
##  [5] Biobase_2.60.0              MatrixGenerics_1.12.2      
##  [7] matrixStats_1.0.0           GenomicRanges_1.52.0       
##  [9] GenomeInfoDb_1.36.1         IRanges_2.34.1             
## [11] S4Vectors_0.38.1            BiocGenerics_0.46.0        
## [13] rstatix_0.7.2               cowplot_1.1.1              
## [15] ggplot2_3.4.3               gtools_3.9.4               
## [17] tidyr_1.3.0                 dplyr_1.1.3                
## [19] SeuratObject_4.1.4          Seurat_4.4.0               
## 
## loaded via a namespace (and not attached):
##   [1] RColorBrewer_1.1-3      rstudioapi_0.15.0       jsonlite_1.8.7         
##   [4] magrittr_2.0.3          spatstat.utils_3.0-3    farver_2.1.1           
##   [7] rmarkdown_2.25          zlibbioc_1.46.0         vctrs_0.6.3            
##  [10] ROCR_1.0-11             spatstat.explore_3.2-3  RCurl_1.98-1.12        
##  [13] S4Arrays_1.0.4          htmltools_0.5.6         broom_1.0.5            
##  [16] sass_0.4.7              sctransform_0.4.0       parallelly_1.36.0      
##  [19] KernSmooth_2.23-22      bslib_0.5.1             htmlwidgets_1.6.2      
##  [22] ica_1.0-3               plyr_1.8.9              plotly_4.10.2          
##  [25] zoo_1.8-12              cachem_1.0.8            igraph_1.5.1           
##  [28] mime_0.12               lifecycle_1.0.3         pkgconfig_2.0.3        
##  [31] Matrix_1.6-1.1          R6_2.5.1                fastmap_1.1.1          
##  [34] GenomeInfoDbData_1.2.10 fitdistrplus_1.1-11     future_1.33.0          
##  [37] shiny_1.7.5             digest_0.6.33           colorspace_2.1-0       
##  [40] patchwork_1.1.3         tensor_1.5              irlba_2.3.5.1          
##  [43] crosstalk_1.2.0         labeling_0.4.3          progressr_0.14.0       
##  [46] fansi_1.0.4             spatstat.sparse_3.0-2   httr_1.4.7             
##  [49] polyclip_1.10-6         abind_1.4-5             compiler_4.3.1         
##  [52] withr_2.5.1             backports_1.4.1         BiocParallel_1.34.2    
##  [55] carData_3.0-5           MASS_7.3-60             DelayedArray_0.26.6    
##  [58] tools_4.3.1             lmtest_0.9-40           httpuv_1.6.11          
##  [61] future.apply_1.11.0     goftest_1.2-3           glue_1.6.2             
##  [64] nlme_3.1-163            promises_1.2.1          grid_4.3.1             
##  [67] Rtsne_0.16              cluster_2.1.4           reshape2_1.4.4         
##  [70] generics_0.1.3          gtable_0.3.4            spatstat.data_3.0-1    
##  [73] data.table_1.14.8       XVector_0.40.0          sp_2.1-0               
##  [76] car_3.1-2               utf8_1.2.3              spatstat.geom_3.2-5    
##  [79] RcppAnnoy_0.0.21        RANN_2.6.1              pillar_1.9.0           
##  [82] limma_3.56.2            later_1.3.1             splines_4.3.1          
##  [85] lattice_0.21-9          survival_3.5-7          deldir_1.0-9           
##  [88] tidyselect_1.2.0        locfit_1.5-9.8          miniUI_0.1.1.1         
##  [91] pbapply_1.7-2           knitr_1.44              gridExtra_2.3          
##  [94] scattermore_1.2         xfun_0.40               DT_0.28                
##  [97] stringi_1.7.12          lazyeval_0.2.2          yaml_2.3.7             
## [100] evaluate_0.22           codetools_0.2-19        tibble_3.2.1           
## [103] cli_3.6.1               uwot_0.1.16             xtable_1.8-4           
## [106] reticulate_1.32.0       munsell_0.5.0           jquerylib_0.1.4        
## [109] Rcpp_1.0.11             globals_0.16.2          spatstat.random_3.1-6  
## [112] png_0.1-8               parallel_4.3.1          ellipsis_0.3.2         
## [115] bitops_1.0-7            listenv_0.9.0           viridisLite_0.4.2      
## [118] scales_1.2.1            ggridges_0.5.4          crayon_1.5.2           
## [121] leiden_0.4.3            purrr_1.0.2             rlang_1.1.1